import gym
import random
import numpy as np
import time


env = gym.make("Taxi-v3")
for i_episode in range(20):
    observation = env.reset() #初始化环境每次迭代
    print (observation)
    for t in range(10):
        env.render('human') #显示
        action = env.action_space.sample() #随机选择action
        observation, reward, terminated, info = env.step(action)
        print (observation)
        if terminated:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()